User blog:QuickNick/Power Query
Power Query (also known as Microsoft Power BI) is a fantastic tool which, with a few clicks, can be used to extract data from the Wiki. The possible change to the format of the Series tables may cause problems for those who extract the data with Power Query so here is a sample script which is also a good introduction to many of the features in Power Query. Please note that this script was not created with the automatic step-by-step procedure but with the Advanced Editor which (though more difficult) allows me to start from scratch and write the code as I wish which means I can use my naming conventions, combine steps and add comments. //----------------------------------------------------------------- // query to load the new career page group tables //----------------------------------------------------------------- let // load the career web page into a table of career groups strPage = "http://rr3.wikia.com/wiki/Series_and_Special_Events_NEW", tblCareerPage = Web.Page(Web.Contents(strPage)), // jump to code below functions ... //---------------- funcLoadGroup = (GrpKey, Group) => let // load the group table // NOTE: for reasons I don't understand it sometimes adds the "Edit" text from the section header to the table name! tblLoad = Table.SelectRows(tblCareerPage, each Text.Upper(Caption) = Text.Upper(Group) or Text.Upper(Caption) = Text.Upper(Group & "Edit") ){0}Data, // determine if it includes a special series, promote first row and rename columns tblWork = if Table.HasColumns(tblLoad, "Series (Abbr.)") then // table does NOT include special series Table.RenameColumns( tblLoad ,{ {"Index", "IndexVer"} ,{"Series (Abbr.)", "SeriesAbbr"} ,{"Cars", "Car1"} ,{"Cars2", "Car2"} ,{"Cars3", "Car3"} ,{"Cars4", "Car4"} }) else // table includes special seres Table.RenameColumns( Table.PromoteHeaders( tblLoad ,PromoteAllScalars=true) ,{ {"Index", "IndexVer"} ,{"Series (Abbr.)", "SeriesAbbr"} ,{"Cars", "Car1"} ,{"Cars_1", "Car2"} ,{"Cars_2", "Car3"} ,{"Cars_3", "Car4"} }) , // process the table tblGroup = // 39. add the group key and group name Table.AddColumn( Table.AddColumn( // 33. group the series rows Table.Group( // 32. expand the cars sub-table Table.ExpandTableColumn( // 31. create sub-table of cars Table.AddColumn( // 22. add the series and abbreviation columns Table.AddColumn( Table.AddColumn( // 21. split the series and abbreviation Table.AddColumn( // 15. set the special-series flag Table.AddColumn( // 13. set the version Table.AddColumn( // 12. set the index Table.AddColumn( // 11. split the index and version number Table.SplitColumn( // 05. set the column types Table.TransformColumnTypes( // 00. use the above work table tblWork // 05. ,{ {"IndexVer" ,type text} ,{"SeriesAbbr", type text} ,{"Car1", type text} ,{"Car2", type text} ,{"Car3", type text} ,{"Car4", type text} }) // 11. ,"IndexVer", Splitter.SplitTextByDelimiter("(", QuoteStyle.Csv), {"_tmpIndex", "_tmpVersion"}) // 12. ,"SerIndex", each Text.Replace(Text.Replace(_tmpIndex,"#(cr)",""),"#(lf)","") ) // 13. ,"SerVer", each if _tmpVersion = "(?)" then null else Text.Replace(Text.Replace(_tmpVersion,"(",""),")","") ) // 15. ,"SerSpecial", each if SeriesAbbr = Car1 then "Y" else "N") // 21. ,"_recSeriesAbbr", each funcSplitSeriesAbbr(SeriesAbbr) ) // 22. ,"Series", each _recSeriesAbbrSeries) ,"SerAbbr", each _recSeriesAbbrSerAbbr) // 31. ,"tblCars", each funcCarsToTable(SerSpecial, Car1, Car2, Car3, Car4) ) // 32. ,"tblCars", {"Car"}) // 33. ,{"SerIndex", "SerAbbr", "Series", "SerVer", "SerSpecial"} ,{ {"tblCars", each Table.AddIndexColumn( Table.SelectColumns(_, {"Car"}) ,"CarSortKey", 1, 1) , type table} }) // 39. ,"GrpKey", each GrpKey) ,"Group", each Group) // done! in tblGroup, //---- funcSplitSeriesAbbr = (strSeriesAbbr) => let posBracket = Text.PositionOf(strSeriesAbbr, "(", Occurrence.Last), recSeriesAbbr = if posBracket >= 0 then [ Series = Text.Start(strSeriesAbbr, posBracket), SerAbbr = Text.Replace(Text.Range(strSeriesAbbr, posBracket + 1), ")", "") ] else [ Series = strSeriesAbbr, SerAbbr = null ] in recSeriesAbbr, //---- funcCarsToTable = (flagSpecial, car1, car2, car3, car4) => if Text.Upper(flagSpecial) = "Y" then null else Table.SelectRows( #table({"Car"}, { {car1}, {car2}, {car3}, {car4} }) ,each Car <> null and Car <> "") , //---------------- // use above function to load all series and cars and combine into row entries for each series with a sub-table of cars // then carry out the steps below which must be done on the data-set as a whole e.g. add the series key // expand the data so there's a separate row for each group-series-car combination //-------- // TESTING //---- // tblSeries = funcLoadGroup(1, "Amateur") // tblSeries = funcLoadGroup(2, "NASCAR") // tblSeries = funcLoadGroup(3, "Pro/Am") // tblSeries = funcLoadGroup(4, "Pro") // tblSeries = funcLoadGroup(5, "Expert") // tblSeries = funcLoadGroup(6, "Master") // tblSeries = funcLoadGroup(7, "Elite") // tblSeries = funcLoadGroup(8, "Legend") // tblSeries = funcLoadGroup(9, "Exclusive Series") //-------- /* to test the above group loads just wrap the section below in these comment characters and remove the // from the group line you wish to test */ tblSeries = // 99. return the relevant columns in the desired order Table.SelectColumns( // 12. expand the cars sub-table Table.ExpandTableColumn( // 11. add a series key Table.AddIndexColumn( // 00. load the series using the above function // this isn't ideal as I've had to hard-code the group numbers and names ... but it works! funcLoadGroup(1, "Amateur") & funcLoadGroup(2, "NASCAR") & funcLoadGroup(3, "Pro/Am") & funcLoadGroup(4, "Pro") & funcLoadGroup(5, "Expert") & funcLoadGroup(6, "Master") & funcLoadGroup(7, "Elite") & funcLoadGroup(8, "Legend") & funcLoadGroup(9, "Exclusive Series") // 11. ,"SerKey", 1, 1) // 12. ,"tblCars", {"Car", "CarSortKey"}) // 99. ,{"GrpKey", "Group", "SerKey", "SerIndex", "SerAbbr", "Series", "SerVer", "SerSpecial", "Car", "CarSortKey"}) // done! in tblSeries //----------------------------------------------------------------- Multiple commands can be combined into a single line of code strOut = Text.Tim(Text.Trim(strIn, "(" ), ")" ), with the inner-most being executed first. Something to consider/remember about PQ is that every line of code has to assign a result to something and once assigned it cannot be changed. I see no point in creating all of these unnecessary instances so I combine those lines of code. To make it easier (for me) to understand I number each command-parameters combination. The script makes use of inline functions, for those who don’t know these can be *Single-line functions e.g. **'funcCarsToTable' is a single line of code *Multi-line functions which must be wrapped in an additional let ... in construct e.g. **'funcLoadGroup' **'funcSplitSeriesAbbr' Comments can appear anywhere ... *'//' is a single-line comment, the rest of the line is a comment. *'/* ... */ ' is a short embedded comment or a multi-line comment, everything inside is ignored. // comment strGroup = "Amateur", // comment strSeries /* comment */ = "Racing School Basics", /* Comment-1 Comment-2 */ Feel free to contact me if you need help. Nick Category:Blog posts